dat %>%
ggplot(aes(year, lakeid, fill=is.na(daynum))) +
geom_tile(color="grey") +
theme_bw() +
facet_wrap(~metric, nrow=6)
# labs(fill = "Missing") +
# scale_fill_scico(palette = 'imola')
# dat %>%
# ggplot(aes(year, lakeid, fill=daynum)) +
# geom_tile(color="grey") +
# theme_bw() +
# facet_wrap(~metric, nrow=6) +
# # labs(fill = "Missing") +
# scale_fill_viridis_c()
Remaining data issues:
Predict ice-on in northern lakes from other lakes ice-on dates
n_iceoff_wide = dat %>%
select(-sampledate)%>%
filter(lakeid %in% c("AL", "BM", "CB", "CR", "SP", "TB", "TR") & metric == "iceoff") %>%
pivot_wider(names_from = lakeid, values_from = daynum)
al_iceoff_model = lm(AL~BM+CB+CR+SP+TB+TR, data=n_iceoff_wide)
summary(al_iceoff_model)
##
## Call:
## lm(formula = AL ~ BM + CB + CR + SP + TB + TR, data = n_iceoff_wide)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.3721 -0.8451 0.1265 0.9507 2.7265
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.10967 3.27070 -0.951 0.3496
## BM -0.03295 0.18851 -0.175 0.8624
## CB 0.24815 0.12648 1.962 0.0594 .
## CR 0.20333 0.17076 1.191 0.2434
## SP 0.31430 0.20384 1.542 0.1339
## TB 0.19500 0.10280 1.897 0.0678 .
## TR 0.09211 0.13278 0.694 0.4934
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.779 on 29 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.9793, Adjusted R-squared: 0.975
## F-statistic: 228.9 on 6 and 29 DF, p-value: < 2.2e-16
plot(n_iceoff_wide$AL, predict(al_iceoff_model, n_iceoff_wide), pch=16)
# actually looks good; use to fill
cb_iceoff_model = lm(CB~AL+BM+CR+SP+TB+TR, data=n_iceoff_wide)
summary(cb_iceoff_model)
##
## Call:
## lm(formula = CB ~ AL + BM + CR + SP + TB + TR, data = n_iceoff_wide)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.3604 -0.8546 0.0970 1.5519 3.4296
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.02887 4.54691 0.666 0.5106
## AL 0.47220 0.24068 1.962 0.0594 .
## BM 0.04037 0.26007 0.155 0.8777
## CR 0.46368 0.22535 2.058 0.0487 *
## SP 0.04008 0.29239 0.137 0.8919
## TB 0.08250 0.14956 0.552 0.5854
## TR -0.14222 0.18277 -0.778 0.4428
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.455 on 29 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.9584, Adjusted R-squared: 0.9498
## F-statistic: 111.5 on 6 and 29 DF, p-value: < 2.2e-16
plot(n_iceoff_wide$CB, predict(cb_iceoff_model, n_iceoff_wide), pch=16)
# really good; go ahead and fill
al_missing_iceoff = n_iceoff_wide %>% filter(is.na(AL))
al_missing_iceoff$iceoff_fill = round(predict(al_iceoff_model, al_missing_iceoff))
al_iceoff_fill = al_missing_iceoff %>%
select(metric, year, iceoff_fill) %>%
rename(daynum_fill = iceoff_fill) %>%
mutate(metric = "iceoff", lakeid = "AL")
cb_missing_iceoff = n_iceoff_wide %>% filter(is.na(CB))
cb_missing_iceoff$iceoff_fill = round(predict(cb_iceoff_model, cb_missing_iceoff))
cb_iceoff_fill = cb_missing_iceoff %>%
select(metric, year, iceoff_fill) %>%
rename(daynum_fill = iceoff_fill) %>%
mutate(metric = "iceoff", lakeid = "CB")
iceoff_fill = bind_rows(al_iceoff_fill, cb_iceoff_fill)
Predict DOC daynum from other variable daynum’s in each northern lake
no_dups = dat %>% group_by(lakeid, year, metric) %>% summarise(N = n()) %>% filter(N == 1)
## `summarise()` has grouped output by 'lakeid', 'year'. You can override using
## the `.groups` argument.
n_lakes_wide = left_join(no_dups, dat) %>%
select(-sampledate) %>%
pivot_wider(names_from = metric, values_from = daynum) %>%
filter(lakeid %in% c("AL", "BM", "CB", "CR", "SP", "TB", "TR"))
## Joining, by = c("lakeid", "year", "metric")
hold_data = na.omit(data.frame(n_lakes_wide %>% ungroup() %>% select(-year, -N))) # , -doc_predict
all = lm(doc_epiMax~(iceon+straton+stratoff+energy+
stability+anoxia_summer+secchi_all+
secchi_openwater+total_zoop_biomass+daphnia_biomass)*lakeid, data=hold_data)
i_o = lm(doc_epiMax~1, data=hold_data)
hold_step = step(i_o, scope=formula(all))
## Start: AIC=1737.66
## doc_epiMax ~ 1
##
## Df Sum of Sq RSS AIC
## + lakeid 6 78192 370019 1705.8
## + stratoff 1 21732 426479 1728.3
## + total_zoop_biomass 1 4122 444089 1737.5
## <none> 448211 1737.7
## + energy 1 3897 444314 1737.7
## + secchi_openwater 1 2877 445334 1738.2
## + daphnia_biomass 1 2510 445700 1738.4
## + straton 1 2041 446170 1738.6
## + secchi_all 1 1318 446893 1739.0
## + anoxia_summer 1 406 447805 1739.5
## + iceon 1 299 447912 1739.5
## + stability 1 12 448199 1739.7
##
## Step: AIC=1705.76
## doc_epiMax ~ lakeid
##
## Df Sum of Sq RSS AIC
## + stratoff 1 6571 363448 1703.7
## + anoxia_summer 1 4428 365591 1705.0
## <none> 370019 1705.8
## + daphnia_biomass 1 2831 367188 1706.0
## + total_zoop_biomass 1 801 369218 1707.3
## + straton 1 432 369587 1707.5
## + secchi_all 1 324 369695 1707.6
## + secchi_openwater 1 139 369880 1707.7
## + iceon 1 132 369887 1707.7
## + energy 1 40 369979 1707.7
## + stability 1 27 369992 1707.7
## - lakeid 6 78192 448211 1737.7
##
## Step: AIC=1703.65
## doc_epiMax ~ lakeid + stratoff
##
## Df Sum of Sq RSS AIC
## + anoxia_summer 1 4750 358698 1702.6
## <none> 363448 1703.7
## + daphnia_biomass 1 2324 361124 1704.2
## + total_zoop_biomass 1 801 362648 1705.2
## + straton 1 586 362862 1705.3
## + iceon 1 542 362907 1705.3
## + secchi_all 1 495 362953 1705.3
## + energy 1 154 363294 1705.6
## + secchi_openwater 1 145 363303 1705.6
## + stability 1 117 363332 1705.6
## - stratoff 1 6571 370019 1705.8
## + stratoff:lakeid 6 13873 349576 1706.7
## - lakeid 6 63031 426479 1728.3
##
## Step: AIC=1702.64
## doc_epiMax ~ lakeid + stratoff + anoxia_summer
##
## Df Sum of Sq RSS AIC
## <none> 358698 1702.6
## + daphnia_biomass 1 2138 356560 1703.3
## - anoxia_summer 1 4750 363448 1703.7
## + total_zoop_biomass 1 758 357940 1704.2
## + secchi_all 1 522 358176 1704.3
## + straton 1 476 358223 1704.3
## + energy 1 144 358555 1704.5
## + stability 1 134 358565 1704.6
## + iceon 1 131 358567 1704.6
## + secchi_openwater 1 83 358615 1704.6
## - stratoff 1 6892 365591 1705.0
## + anoxia_summer:lakeid 6 13688 345010 1705.7
## + stratoff:lakeid 6 12182 346516 1706.7
## - lakeid 6 62932 421630 1727.7
doc_model = lm(doc_epiMax~(iceon+straton+stratoff+energy+
stability+anoxia_summer+secchi_all+
secchi_openwater+total_zoop_biomass+daphnia_biomass)*lakeid,
data=n_lakes_wide,
na.action = na.exclude)
summary(doc_model)
##
## Call:
## lm(formula = doc_epiMax ~ (iceon + straton + stratoff + energy +
## stability + anoxia_summer + secchi_all + secchi_openwater +
## total_zoop_biomass + daphnia_biomass) * lakeid, data = n_lakes_wide,
## na.action = na.exclude)
##
## Residuals:
## Min 1Q Median 3Q Max
## -112.408 -18.508 1.979 21.382 82.409
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.311e+02 1.434e+02 1.612 0.10904
## iceon -4.902e-01 3.104e-01 -1.579 0.11627
## straton -1.220e-02 3.042e-01 -0.040 0.96806
## stratoff 9.291e-01 3.348e-01 2.775 0.00619 **
## energy -2.488e-01 2.152e-01 -1.156 0.24937
## stability -1.471e-01 1.454e-01 -1.012 0.31304
## anoxia_summer -1.244e-01 1.594e-01 -0.781 0.43615
## secchi_all 8.906e-02 7.183e-02 1.240 0.21688
## secchi_openwater -2.327e-01 1.050e-01 -2.215 0.02817 *
## total_zoop_biomass 4.952e-02 6.695e-02 0.740 0.46062
## daphnia_biomass 9.345e-02 7.328e-02 1.275 0.20413
## lakeid.L 4.011e+01 3.961e+02 0.101 0.91948
## lakeid.Q 1.435e+02 3.762e+02 0.382 0.70331
## lakeid.C -1.958e+02 3.882e+02 -0.504 0.61467
## lakeid^4 3.617e+02 3.847e+02 0.940 0.34847
## lakeid^5 -1.680e+02 3.801e+02 -0.442 0.65903
## lakeid^6 -1.908e+02 3.496e+02 -0.546 0.58593
## iceon:lakeid.L -5.231e-01 7.970e-01 -0.656 0.51260
## iceon:lakeid.Q 7.488e-01 8.113e-01 0.923 0.35745
## iceon:lakeid.C 8.301e-01 8.266e-01 1.004 0.31683
## iceon:lakeid^4 -5.708e-01 7.929e-01 -0.720 0.47266
## iceon:lakeid^5 9.560e-01 8.552e-01 1.118 0.26530
## iceon:lakeid^6 9.133e-01 8.420e-01 1.085 0.27969
## straton:lakeid.L 4.962e-01 8.050e-01 0.616 0.53847
## straton:lakeid.Q -1.749e-01 7.761e-01 -0.225 0.82200
## straton:lakeid.C -3.793e-01 8.146e-01 -0.466 0.64211
## straton:lakeid^4 1.276e+00 8.270e-01 1.543 0.12482
## straton:lakeid^5 7.029e-01 8.242e-01 0.853 0.39508
## straton:lakeid^6 -5.570e-01 7.813e-01 -0.713 0.47689
## stratoff:lakeid.L 4.236e-01 9.075e-01 0.467 0.64135
## stratoff:lakeid.Q -1.591e+00 7.816e-01 -2.036 0.04341 *
## stratoff:lakeid.C 4.492e-01 9.173e-01 0.490 0.62500
## stratoff:lakeid^4 -1.372e-01 9.896e-01 -0.139 0.88993
## stratoff:lakeid^5 -5.437e-01 9.269e-01 -0.587 0.55835
## stratoff:lakeid^6 -1.398e+00 7.704e-01 -1.814 0.07152 .
## energy:lakeid.L -4.274e-01 5.804e-01 -0.736 0.46264
## energy:lakeid.Q -4.231e-01 5.481e-01 -0.772 0.44139
## energy:lakeid.C 1.554e-02 5.715e-01 0.027 0.97834
## energy:lakeid^4 -4.046e-01 6.107e-01 -0.663 0.50858
## energy:lakeid^5 2.961e-01 5.623e-01 0.527 0.59928
## energy:lakeid^6 5.166e-01 5.398e-01 0.957 0.33997
## stability:lakeid.L 1.717e-01 3.633e-01 0.473 0.63707
## stability:lakeid.Q 5.557e-01 3.887e-01 1.430 0.15482
## stability:lakeid.C -2.151e-01 3.939e-01 -0.546 0.58572
## stability:lakeid^4 -1.023e+00 3.235e-01 -3.162 0.00188 **
## stability:lakeid^5 6.405e-02 4.223e-01 0.152 0.87963
## stability:lakeid^6 -3.525e-01 4.077e-01 -0.865 0.38860
## anoxia_summer:lakeid.L -2.248e-01 4.792e-01 -0.469 0.63960
## anoxia_summer:lakeid.Q 6.326e-01 4.635e-01 1.365 0.17421
## anoxia_summer:lakeid.C -4.707e-01 4.141e-01 -1.137 0.25745
## anoxia_summer:lakeid^4 -1.564e-01 4.457e-01 -0.351 0.72619
## anoxia_summer:lakeid^5 -4.895e-01 3.366e-01 -1.454 0.14785
## anoxia_summer:lakeid^6 1.068e+00 3.725e-01 2.867 0.00471 **
## secchi_all:lakeid.L -5.570e-02 1.628e-01 -0.342 0.73267
## secchi_all:lakeid.Q -3.857e-01 1.760e-01 -2.192 0.02986 *
## secchi_all:lakeid.C -2.896e-01 1.979e-01 -1.463 0.14542
## secchi_all:lakeid^4 1.813e-01 1.575e-01 1.151 0.25132
## secchi_all:lakeid^5 3.066e-01 2.277e-01 1.347 0.18000
## secchi_all:lakeid^6 4.941e-01 2.084e-01 2.371 0.01894 *
## secchi_openwater:lakeid.L 3.880e-01 2.567e-01 1.511 0.13270
## secchi_openwater:lakeid.Q -6.669e-02 2.750e-01 -0.243 0.80870
## secchi_openwater:lakeid.C 4.389e-01 2.879e-01 1.524 0.12943
## secchi_openwater:lakeid^4 1.363e-01 2.269e-01 0.601 0.54880
## secchi_openwater:lakeid^5 -6.341e-01 3.161e-01 -2.006 0.04656 *
## secchi_openwater:lakeid^6 -3.994e-01 2.959e-01 -1.350 0.17896
## total_zoop_biomass:lakeid.L -2.801e-03 1.743e-01 -0.016 0.98720
## total_zoop_biomass:lakeid.Q -2.498e-01 1.837e-01 -1.360 0.17580
## total_zoop_biomass:lakeid.C -4.307e-01 1.800e-01 -2.393 0.01790 *
## total_zoop_biomass:lakeid^4 1.652e-02 1.539e-01 0.107 0.91468
## total_zoop_biomass:lakeid^5 -1.730e-01 1.856e-01 -0.932 0.35270
## total_zoop_biomass:lakeid^6 2.574e-01 1.833e-01 1.404 0.16220
## daphnia_biomass:lakeid.L -1.258e-01 2.072e-01 -0.607 0.54461
## daphnia_biomass:lakeid.Q 1.845e-01 2.030e-01 0.909 0.36470
## daphnia_biomass:lakeid.C 3.940e-01 1.953e-01 2.018 0.04532 *
## daphnia_biomass:lakeid^4 -8.202e-02 1.928e-01 -0.426 0.67104
## daphnia_biomass:lakeid^5 2.391e-01 1.826e-01 1.309 0.19233
## daphnia_biomass:lakeid^6 -2.512e-01 1.811e-01 -1.387 0.16731
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 38.94 on 158 degrees of freedom
## (38 observations deleted due to missingness)
## Multiple R-squared: 0.4674, Adjusted R-squared: 0.2112
## F-statistic: 1.824 on 76 and 158 DF, p-value: 0.0008178
n_lakes_wide$doc_predict = predict(doc_model, n_lakes_wide)
cor = n_lakes_wide %>%
group_by(lakeid) %>%
summarise(r = paste("r =", round(cor(doc_epiMax, doc_predict, use="complete.obs"), 3)))
n_lakes_wide %>%
ggplot(aes(x=doc_epiMax, y=doc_predict, color=as.character(lakeid))) +
geom_point()+
theme_bw() +
labs(color="lakeid")+
geom_abline(slope=1, intercept = 0) +
facet_wrap(~lakeid) +
geom_text(aes(label=r), data=cor, x=300, y=0) +
labs(x="obs peak DOC (daynum)", y="modeled peak DOC (daynum)")
## Warning: Removed 38 rows containing missing values (`geom_point()`).
# not good but vaguely positive relationship; use this instead of median?
missing_doc = n_lakes_wide %>% filter(is.na(doc_epiMax))
missing_doc$doc_fill = round(predict(doc_model, missing_doc))
doc_fill = missing_doc %>%
select(lakeid, year, doc_fill) %>%
rename(daynum_fill = doc_fill) %>%
mutate(metric = "doc_epiMax") %>%
filter(year < 2000)
**Using dates from bad/uncalibrated chl data
filled_chl_dates = read_csv("Data/derived/chl_filled_peaks.csv") %>%
rename(daynum_fill = daynum) %>%
select(-sampledate)
## Rows: 36 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): lakeid, metric
## dbl (2): year, daynum
## date (1): sampledate
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
all_fill = bind_rows(iceoff_fill, doc_fill, filled_chl_dates)
dat_filled = full_join(dat, all_fill) %>%
mutate(filled_flag = ifelse(is.na(daynum) & !is.na(daynum_fill), TRUE, FALSE)) %>%
mutate(daynum_fill = ifelse(is.na(daynum) & !is.na(daynum_fill), daynum_fill, daynum))
## Joining, by = c("lakeid", "year", "metric")
dat_filled$lakeid = factor(dat_filled$lakeid,
levels = c("AL", "BM", "CB", "CR", "SP", "TB", "TR", "FI", "ME", "MO", "WI"),
ordered = T)
vars = c("iceoff", "straton", "secchi_all", "secchi_openwater", "daphnia_biomass","zoopDensity",
"total_zoop_biomass", "chlor_all", "chlor_spring", "chlor_fall", "doc_epiMax",
"totpuf_epiMax", "totpuf_epiMin", "totpuf_hypoMax", "totpuf_hypoMin",
"anoxia", "anoxia_summer", "stability", "energy", "stratoff", "iceon")
dat_filled$metric = factor(dat_filled$metric,
levels = rev(vars),
ordered=T)
dat_filled %>%
ggplot(aes(year, metric, fill=is.na(daynum_fill))) +
geom_tile(color="grey") +
theme_bw() +
facet_wrap(~lakeid, nrow=6) # +
# labs(fill = "Missing") +
# scale_fill_viridis_c()
Trim to “good” years, fill FI ice data with Monona, then fill additional missing with median values for each lake/metric
df_yearsWant = dat_filled %>%
filter((lakeid %in% c("FI", "ME", "MO", "WI") & year %in% 1996:2018) |
(!(lakeid %in% c("FI", "ME", "MO", "WI")) & year %in% 1982:2018))
fi_icefill = df_yearsWant %>%
filter(lakeid == "MO" & metric %in% c("iceoff", "iceon")) %>%
mutate(lakeid = "FI") %>%
select(-daynum, -filled_flag, -sampledate) %>%
rename(icefill = daynum_fill)
df_yearsWant = df_yearsWant %>%
full_join(fi_icefill) %>%
mutate(daynum_fill = ifelse(is.na(daynum_fill) & !is.na(icefill),
icefill, daynum),
filled_flag = ifelse(is.na(daynum) & !is.na(daynum_fill) & !is.na(icefill),
TRUE, filled_flag))
## Joining, by = c("lakeid", "year", "metric")
df_yearsWant %>%
ggplot(aes(year, metric, fill=filled_flag)) +
geom_tile(color="grey") +
theme_bw() +
facet_wrap(~lakeid, nrow=6)
all_lys = df_yearsWant %>%
select(lakeid, year) %>%
distinct()
metric = unique(df_yearsWant$metric)
all_lys_want = expand_grid(all_lys, metric)
dat_final = left_join(all_lys_want, df_yearsWant)
## Joining, by = c("lakeid", "year", "metric")
medians = dat_final %>%
group_by(lakeid, metric) %>%
summarise(median_daynum = median(daynum_fill, na.rm=T))
## `summarise()` has grouped output by 'lakeid'. You can override using the
## `.groups` argument.
dat_final = dat_final %>%
left_join(medians) %>%
mutate(daynum_fill = ifelse(is.na(daynum_fill), median_daynum, daynum_fill)) %>%
mutate(filled_flag = ifelse(is.na(daynum) & !is.na(daynum_fill), TRUE, filled_flag)) %>%
select(-icefill, -median_daynum)
## Joining, by = c("lakeid", "metric")
dat_final %>%
ggplot(aes(year, metric, fill=daynum_fill)) +
geom_tile(color="grey") +
theme_bw() +
facet_wrap(~lakeid, nrow=6)
write_csv(dat_final, "Data/analysis_ready/final_combined_dates_filled_v2.csv")
========================================================================================= ## Old code: no longer used
no_dups = dat %>% group_by(lakeid, year, metric) %>% summarise(N = n()) %>% filter(N == 1)
## `summarise()` has grouped output by 'lakeid', 'year'. You can override using
## the `.groups` argument.
s_lakes_wide = left_join(no_dups, dat) %>%
select(-sampledate) %>%
pivot_wider(names_from = metric, values_from = daynum) %>%
filter(lakeid %in% c("FI", "ME", "MO", "WI"))
## Joining, by = c("lakeid", "year", "metric")
# stepwise
hold_data = na.omit(data.frame(s_lakes_wide %>% filter(lakeid != "FI") %>% select(-chlor_spring, -chlor_fall, -daphnia_biomass, -total_zoop_biomass)))
all = lm(chlor_all~., data=hold_data)
i_o = lm(chlor_all~1, data=hold_data)
hold = step(i_o, scope=formula(all))
## Start: AIC=336.42
## chlor_all ~ 1
##
## Df Sum of Sq RSS AIC
## + lakeid 1 23765.6 147231 332.44
## + totpuf_epiMin 1 11056.3 159940 335.75
## + stratoff 1 9424.6 161572 336.15
## <none> 170996 336.42
## + secchi_openwater 1 6617.2 164379 336.84
## + straton 1 3156.5 167840 337.68
## + secchi_all 1 2841.7 168155 337.75
## + zoopDensity 1 2542.1 168454 337.82
## + anoxia_summer 1 1765.7 169231 338.01
## + totpuf_epiMax 1 1110.1 169886 338.16
## + doc_epiMax 1 1019.7 169977 338.18
## + totpuf_hypoMin 1 669.8 170327 338.26
## + year 1 81.3 170915 338.40
## + iceon 1 75.2 170921 338.40
## + energy 1 23.2 170973 338.42
## + stability 1 21.0 170975 338.42
## + totpuf_hypoMax 1 3.0 170993 338.42
## + iceoff 1 0.7 170996 338.42
##
## Step: AIC=332.44
## chlor_all ~ lakeid
##
## Df Sum of Sq RSS AIC
## + stratoff 1 9424.6 137806 331.79
## + straton 1 7685.7 139545 332.29
## <none> 147231 332.44
## + totpuf_epiMin 1 7104.0 140127 332.46
## + zoopDensity 1 4245.0 142986 333.26
## + anoxia_summer 1 3720.5 143510 333.41
## + secchi_all 1 2301.3 144929 333.80
## + iceon 1 2087.2 145144 333.86
## + secchi_openwater 1 1484.8 145746 334.03
## + doc_epiMax 1 837.4 146393 334.21
## + energy 1 572.3 146658 334.28
## + totpuf_hypoMax 1 391.2 146840 334.33
## + iceoff 1 380.4 146850 334.33
## + stability 1 253.0 146978 334.37
## + totpuf_epiMax 1 204.1 147027 334.38
## + year 1 81.3 147149 334.41
## + totpuf_hypoMin 1 38.6 147192 334.42
## - lakeid 1 23765.6 170996 336.42
##
## Step: AIC=331.79
## chlor_all ~ lakeid + stratoff
##
## Df Sum of Sq RSS AIC
## <none> 137806 331.79
## + straton 1 5781.8 132024 332.07
## - stratoff 1 9424.6 147231 332.44
## + totpuf_epiMin 1 4535.5 133271 332.45
## + anoxia_summer 1 3362.7 134443 332.80
## + stability 1 2242.3 135564 333.13
## + zoopDensity 1 2237.4 135569 333.13
## + secchi_openwater 1 1590.3 136216 333.32
## + secchi_all 1 1342.3 136464 333.40
## + totpuf_epiMax 1 966.9 136839 333.51
## + doc_epiMax 1 875.3 136931 333.53
## + iceon 1 732.2 137074 333.58
## + energy 1 698.6 137108 333.59
## + iceoff 1 320.6 137486 333.70
## + year 1 109.6 137697 333.76
## + totpuf_hypoMin 1 99.0 137707 333.76
## + totpuf_hypoMax 1 93.2 137713 333.76
## - lakeid 1 23765.6 161572 336.15
chlAll_model_MEMOWI = lm(chlor_all~anoxia_summer + lakeid + iceon + stability + straton + stratoff,
data=s_lakes_wide %>% filter(lakeid != "FI"))
summary(chlAll_model_MEMOWI)
##
## Call:
## lm(formula = chlor_all ~ anoxia_summer + lakeid + iceon + stability +
## straton + stratoff, data = s_lakes_wide %>% filter(lakeid !=
## "FI"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -115.40 -44.11 0.33 41.41 122.46
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -149.4119 271.2274 -0.551 0.5841
## anoxia_summer 0.6454 0.3620 1.783 0.0806 .
## lakeid.L -32.6015 26.4849 -1.231 0.2240
## lakeid.Q -32.9677 19.6738 -1.676 0.0999 .
## iceon 0.9296 0.6700 1.388 0.1713
## stability -0.5412 0.2784 -1.944 0.0574 .
## straton 0.6513 0.3881 1.678 0.0995 .
## stratoff -0.4440 0.5099 -0.871 0.3880
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 63.02 on 51 degrees of freedom
## (16 observations deleted due to missingness)
## Multiple R-squared: 0.2751, Adjusted R-squared: 0.1756
## F-statistic: 2.765 on 7 and 51 DF, p-value: 0.01627
plot(s_lakes_wide %>% filter(lakeid != "FI") %>% pull(chlor_all),
predict(chlAll_model_MEMOWI, s_lakes_wide %>% filter(lakeid != "FI")),
col=s_lakes_wide$lakeid, pch=16, xlab="observed", ylab="predicted")
# using to fill for now
missing_chlAll_MEMOWI = s_lakes_wide %>% filter(is.na(chlor_all) & lakeid != "FI")
missing_chlAll_MEMOWI$chl_all_fill = round(predict(chlAll_model_MEMOWI, missing_chlAll_MEMOWI))
chlAll_model_FI = lm(chlor_all~(stratoff+energy+
stability+anoxia_summer+secchi_all+
secchi_openwater),
data=s_lakes_wide %>% filter(lakeid == "FI"))
summary(chlAll_model_FI)
##
## Call:
## lm(formula = chlor_all ~ (stratoff + energy + stability + anoxia_summer +
## secchi_all + secchi_openwater), data = s_lakes_wide %>% filter(lakeid ==
## "FI"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -83.354 -31.601 1.305 37.879 109.887
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 890.13470 511.21808 1.741 0.1072
## stratoff 0.43672 1.22603 0.356 0.7279
## energy -3.12342 1.40241 -2.227 0.0458 *
## stability -0.23441 0.77265 -0.303 0.7668
## anoxia_summer -0.24990 0.65794 -0.380 0.7107
## secchi_all -0.04527 0.22865 -0.198 0.8464
## secchi_openwater -0.21407 0.26752 -0.800 0.4392
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 63.82 on 12 degrees of freedom
## (5 observations deleted due to missingness)
## Multiple R-squared: 0.329, Adjusted R-squared: -0.006451
## F-statistic: 0.9808 on 6 and 12 DF, p-value: 0.4788
plot(s_lakes_wide %>% filter(lakeid == "FI") %>% pull(chlor_all),
predict(chlAll_model_FI, s_lakes_wide %>% filter(lakeid == "FI")),
col=s_lakes_wide$lakeid, pch=16)
missing_chlAll_FI = s_lakes_wide %>% filter(is.na(chlor_all) & lakeid == "FI")
missing_chlAll_FI$chl_all_fill = round(predict(chlAll_model_FI, missing_chlAll_FI))
# gives negative value; DONT fill FI
chlAll_fill = missing_chlAll_MEMOWI %>%
select(lakeid, year, chl_all_fill) %>%
rename(daynum_fill = chl_all_fill) %>%
mutate(metric = "chlor_all")